library(ggplot2)
library(RColorBrewer)
library(cowplot)
library(tidyverse)
library(dplyr)
library(Seurat)
library(SCpubr)
library(patchwork)
library(scico)
setwd("~/Projects/HumanThymusProject/")
source("~/Projects/HumanThymusProject/scripts/colors_universal.R")
The human and murine thymic seurat objects come from the Thymic Atlas generated by Park et al.. See script download_park_thymic_atlas.Rmd for how we obtained these seurat objects with appropriate metadata.
# import human & murine thymic atlases
seur.human <- readRDS("./data_github/park_dataset/park_seurat_human.rds")
seur.mouse <- readRDS("./data_github/park_dataset/park_seurat_mouse.rds")
# remove parathyroid cells
seur.human <- subset(seur.human, Anno_level_3 != "Epi_GCM2")
# Create a new level of annotation
seur.human@meta.data$Anno_curated <- seur.human@meta.data$Anno_level_1
seur.human@meta.data$Anno_curated <- case_when(
seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3=="cTEC" ~ "cTEC",
# seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3%in%c("mTEC", "TEC(myo)", "TEC(neuro)") ~ "mTEC",
seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3=="mTEC" ~ "mTEC",
seur.human@meta.data$Anno_curated=="TEC" & seur.human@meta.data$Anno_level_3!="cTEC" & seur.human@meta.data$Anno_level_3!="mTEC" ~ "TEC_other",
seur.human@meta.data$Anno_curated=="T" & seur.human@meta.data$Anno_level_2=="DN" ~ "DN",
seur.human@meta.data$Anno_curated=="T" & seur.human@meta.data$Anno_level_2=="DP" ~ "DP",
seur.human@meta.data$Anno_curated=="T" & seur.human@meta.data$Anno_level_2=="SP" ~ "SP",
seur.human@meta.data$Anno_curated=="Innate_T" & seur.human@meta.data$Anno_level_3=="NKT" ~ "SP",
seur.human@meta.data$Anno_curated=="Innate_T" & seur.human@meta.data$Anno_level_3=="γδT" ~ "γδT",
seur.human@meta.data$Anno_curated=="Endo" ~ "Endothelial",
seur.human@meta.data$Anno_curated=="Ery" ~ "Erythrocyte",
seur.human@meta.data$Anno_curated=="Mesen" ~ "Mesenchymal",
seur.human@meta.data$Anno_curated=="Mgk" ~ "Megakaryocyte",
.default=seur.human@meta.data$Anno_curated
)
# sanity checks
table(seur.human@meta.data$Anno_curated, useNA="ifany")
##
## B cTEC DN DP Endothelial
## 5082 10156 42474 108418 115
## Erythrocyte HSC Innate_lymphoid Mast Megakaryocyte
## 644 501 2176 148 36
## Mesenchymal mTEC Myeloid SP TEC_other
## 21290 6448 4801 50476 480
## γδT
## 2582
# expect:
# B 5,082
# Endo 115
# Ery 644
# HSC 501
# Innate_lymphoid 2,176
# γδT 2,582
# Mast 148
# Mesen 21,290
# Mgk 36
# Myeloid 4,801
# DN 42,474
# DP 108,418
# SP 50,476
# cTEC 10,156
# mTEC 6,448
# TEC_other 480
table(seur.human@meta.data[,c("Anno_curated", "Anno_level_1")], useNA="ifany")
## Anno_level_1
## Anno_curated B Endo Ery HSC Innate_T Innate_lymphoid Mast
## B 5082 0 0 0 0 0 0
## cTEC 0 0 0 0 0 0 0
## DN 0 0 0 0 0 0 0
## DP 0 0 0 0 0 0 0
## Endothelial 0 115 0 0 0 0 0
## Erythrocyte 0 0 644 0 0 0 0
## HSC 0 0 0 501 0 0 0
## Innate_lymphoid 0 0 0 0 0 2176 0
## Mast 0 0 0 0 0 0 148
## Megakaryocyte 0 0 0 0 0 0 0
## Mesenchymal 0 0 0 0 0 0 0
## mTEC 0 0 0 0 0 0 0
## Myeloid 0 0 0 0 0 0 0
## SP 0 0 0 0 349 0 0
## TEC_other 0 0 0 0 0 0 0
## γδT 0 0 0 0 2582 0 0
## Anno_level_1
## Anno_curated Mesen Mgk Myeloid T TEC
## B 0 0 0 0 0
## cTEC 0 0 0 0 10156
## DN 0 0 0 42474 0
## DP 0 0 0 108418 0
## Endothelial 0 0 0 0 0
## Erythrocyte 0 0 0 0 0
## HSC 0 0 0 0 0
## Innate_lymphoid 0 0 0 0 0
## Mast 0 0 0 0 0
## Megakaryocyte 0 36 0 0 0
## Mesenchymal 21290 0 0 0 0
## mTEC 0 0 0 0 6448
## Myeloid 0 0 4801 0 0
## SP 0 0 0 50127 0
## TEC_other 0 0 0 0 480
## γδT 0 0 0 0 0
# define order to plot clusters
seur.human@meta.data$Anno_curated <- factor(
seur.human@meta.data$Anno_curated,
levels = c(
"DN",
"DP",
"SP",
"γδT",
"cTEC",
"mTEC",
"TEC_other",
"Innate_lymphoid",
"B",
"Myeloid",
"Mesenchymal",
"HSC",
"Mast",
"Erythrocyte",
"Megakaryocyte",
"Endothelial"
)
)
Determine which clusters are most abundant.
# see how many cells there are per cluster
as.data.frame(table(seur.human$Anno_curated)) %>%
mutate(totalcells=sum(Freq),
percentcells=Freq*100/totalcells) %>%
arrange(percentcells)
## Var1 Freq totalcells percentcells
## 1 Megakaryocyte 36 255827 0.01407201
## 2 Endothelial 115 255827 0.04495225
## 3 Mast 148 255827 0.05785160
## 4 TEC_other 480 255827 0.18762679
## 5 HSC 501 255827 0.19583547
## 6 Erythrocyte 644 255827 0.25173262
## 7 Innate_lymphoid 2176 255827 0.85057480
## 8 γδT 2582 255827 1.00927580
## 9 Myeloid 4801 255827 1.87665884
## 10 B 5082 255827 1.98649869
## 11 mTEC 6448 255827 2.52045328
## 12 cTEC 10156 255827 3.96987026
## 13 Mesenchymal 21290 255827 8.32203012
## 14 DN 42474 255827 16.60262599
## 15 SP 50476 255827 19.73052102
## 16 DP 108418 255827 42.37942047
hu_clusters_abundant <- levels(seur.human@meta.data$Anno_curated)[!levels(seur.human@meta.data$Anno_curated) %in% c("Megakaryocyte", "Endothelial", "Mast", "HSC", "Erythrocyte")]
Plot UMAP with cluster annotation.
celltypes_col <- c(
"mTEC" = "#CE3F37",
"cTEC" = "#8C6143",
"TEC_other"= "#666666",
"DN" = "#FDCB89",
"DP" = "#9ecae1",
"SP" = "#7FC97F",
"γδT" = "#B6B1C9",
"Mesenchymal" = "#FEE791",
"Myeloid" = "#F2F59A",
"B" = "#2171b5",
"Endothelial" = "#7D449D",
"Erythrocyte" = "#CD1588",
"HSC" = "#9BB5A4",
"Innate_lymphoid" = "#EDBB99",
"Mast" = "#D1B3BB",
"Megakaryocyte" = "#9ABDA4"
)
# UMAP
Idents(seur.human) <- "Anno_curated"
fig7E_p1 <- do_DimPlot(
seur.human,
reduction = "umap",
idents.keep = hu_clusters_abundant,
na.value = "grey90",
legend.position = "right",
legend.ncol = 1,
font.size = 30,
legend.icon.size = 10
) +
scale_color_manual(values = celltypes_col)
# if needed to rasterise to export plot
# fig7E_p1 <- ggrastr::rasterise(fig7E_p1, layers = "Point", dpi = 300)
Plot CD1D expression on FeaturePlot.
# Plot CD1d expression
fig7E_p2 <- do_FeaturePlot(
seur.human,
features = "CD1D",
order = T,
plot.title = "CD1D expression",
border.color = "black",
border.size = 2,
reduction = "umap",
pt.size = 1.2,
legend.position = "right",
font.size = 30
) &
scale_colour_scico(
palette = "lapaz",
alpha = 0.8,
begin = 0.1,
end = 0.85,
direction = -1
)
# if needed to rasterise to export plot
# fig7E_p2 <- ggrastr::rasterise(fig7E_p2, layers = "Point", dpi = 300)
Combine plots (Fig 7E).
fig7E_p1 | fig7E_p2
To note, in the manuscript, several clusters were removed (HSC, Mast, Erythrocyte, Megakaryocyte, Endothelial) because very few cells were captured, and they had very little level of CD1D, SLAMF1 or SLAMF6 detected.
DotPlot(
seur.human,
features = rev(c("CD1D", "SLAMF1", "SLAMF6")),
group.by = "Anno_curated",
cols = c("lightgrey", "darkred"),
col.min = 0,
dot.scale = 10
) +
coord_flip() +
theme_cowplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "")
# remove GEMM
seur.mouse <- subset(seur.mouse, age != "Rag1KO") # 34,073 cells
# adapt cell annotation
seur.mouse@meta.data$Anno_curated <- case_when(
seur.mouse@meta.data$cell.types=="B" ~ "B",
seur.mouse@meta.data$cell.types %in% c("CD4+T", "CD8+T", "Treg", "αβT(entry)") ~ "SP",
seur.mouse@meta.data$cell.types %in% c("DN(P)", "DN(Q)") ~ "DN",
seur.mouse@meta.data$cell.types %in% c("DP(P)", "DP(Q)") ~ "DP",
seur.mouse@meta.data$cell.types=="Endo" ~ "Endothelial",
seur.mouse@meta.data$cell.types=="Ery" ~ "Erythrocyte",
seur.mouse@meta.data$cell.types %in% c("Fb", "VSMC") ~ "Mesenchymal",
seur.mouse@meta.data$cell.types %in% c("HSC", "NMP") ~ "HSC",
seur.mouse@meta.data$cell.types %in% c("IELpA", "IELpB/NKT", "NK") ~ "Innate_lymphoid",
seur.mouse@meta.data$cell.types %in% c("DC1", "DC2", "aDC", "pDC") ~ "Myeloid",
seur.mouse@meta.data$cell.types %in% c("Mac", "Mono") ~ "Myeloid",
seur.mouse@meta.data$cell.types %in% c("Epi_unknown", "TEC_early") ~ "TEC_other",
seur.mouse@meta.data$cell.types == "cTEC" ~ "cTEC",
seur.mouse@meta.data$cell.types == "mTEC" ~ "mTEC",
seur.mouse@meta.data$cell.types == "γδT" ~ "γδT"
)
# sanity checks
table(seur.mouse@meta.data$Anno_curated, useNA="ifany")
##
## B cTEC DN DP Endothelial
## 207 2917 6288 16104 55
## Erythrocyte HSC Innate_lymphoid Mesenchymal mTEC
## 123 89 878 1024 215
## Myeloid SP TEC_other γδT
## 379 5020 340 434
table(seur.mouse@meta.data[,c("Anno_curated", "cell.types")], useNA="ifany")
## cell.types
## Anno_curated B CD4+T CD8+T DC1 DC2 DN(P) DN(Q) DP(P) DP(Q) Endo
## B 207 0 0 0 0 0 0 0 0 0
## cTEC 0 0 0 0 0 0 0 0 0 0
## DN 0 0 0 0 0 4178 2110 0 0 0
## DP 0 0 0 0 0 0 0 7437 8667 0
## Endothelial 0 0 0 0 0 0 0 0 0 55
## Erythrocyte 0 0 0 0 0 0 0 0 0 0
## HSC 0 0 0 0 0 0 0 0 0 0
## Innate_lymphoid 0 0 0 0 0 0 0 0 0 0
## Mesenchymal 0 0 0 0 0 0 0 0 0 0
## mTEC 0 0 0 0 0 0 0 0 0 0
## Myeloid 0 0 0 22 151 0 0 0 0 0
## SP 0 1483 815 0 0 0 0 0 0 0
## TEC_other 0 0 0 0 0 0 0 0 0 0
## γδT 0 0 0 0 0 0 0 0 0 0
## cell.types
## Anno_curated Epi_unknown Ery Fb HSC IELpA IELpB/NKT Mac Mono NK
## B 0 0 0 0 0 0 0 0 0
## cTEC 0 0 0 0 0 0 0 0 0
## DN 0 0 0 0 0 0 0 0 0
## DP 0 0 0 0 0 0 0 0 0
## Endothelial 0 0 0 0 0 0 0 0 0
## Erythrocyte 0 123 0 0 0 0 0 0 0
## HSC 0 0 0 88 0 0 0 0 0
## Innate_lymphoid 0 0 0 0 285 185 0 0 408
## Mesenchymal 0 0 952 0 0 0 0 0 0
## mTEC 0 0 0 0 0 0 0 0 0
## Myeloid 0 0 0 0 0 0 90 74 0
## SP 0 0 0 0 0 0 0 0 0
## TEC_other 76 0 0 0 0 0 0 0 0
## γδT 0 0 0 0 0 0 0 0 0
## cell.types
## Anno_curated NMP TEC_early Treg VSMC aDC cTEC mTEC pDC αβT(entry) γδT
## B 0 0 0 0 0 0 0 0 0 0
## cTEC 0 0 0 0 0 2917 0 0 0 0
## DN 0 0 0 0 0 0 0 0 0 0
## DP 0 0 0 0 0 0 0 0 0 0
## Endothelial 0 0 0 0 0 0 0 0 0 0
## Erythrocyte 0 0 0 0 0 0 0 0 0 0
## HSC 1 0 0 0 0 0 0 0 0 0
## Innate_lymphoid 0 0 0 0 0 0 0 0 0 0
## Mesenchymal 0 0 0 72 0 0 0 0 0 0
## mTEC 0 0 0 0 0 0 215 0 0 0
## Myeloid 0 0 0 0 11 0 0 31 0 0
## SP 0 0 49 0 0 0 0 0 2673 0
## TEC_other 0 264 0 0 0 0 0 0 0 0
## γδT 0 0 0 0 0 0 0 0 0 434
# define order to plot
seur.mouse@meta.data$Anno_curated <- factor(
seur.mouse@meta.data$Anno_curated,
levels = c(
"DN",
"DP",
"SP",
"γδT",
"cTEC",
"mTEC",
"TEC_other",
"Innate_lymphoid",
"B",
"Myeloid",
"Mesenchymal",
"HSC",
"Erythrocyte",
"Endothelial"
)
)
Determine which clusters are most abundant.
# see how many cells there are per cluster
as.data.frame(table(seur.mouse$Anno_curated)) %>%
mutate(totalcells=sum(Freq),
percentcells=Freq*100/totalcells) %>%
arrange(percentcells)
## Var1 Freq totalcells percentcells
## 1 Endothelial 55 34073 0.1614181
## 2 HSC 89 34073 0.2612039
## 3 Erythrocyte 123 34073 0.3609896
## 4 B 207 34073 0.6075192
## 5 mTEC 215 34073 0.6309982
## 6 TEC_other 340 34073 0.9978575
## 7 Myeloid 379 34073 1.1123177
## 8 γδT 434 34073 1.2737358
## 9 Innate_lymphoid 878 34073 2.5768204
## 10 Mesenchymal 1024 34073 3.0053121
## 11 cTEC 2917 34073 8.5610307
## 12 SP 5020 34073 14.7330731
## 13 DN 6288 34073 18.4544948
## 14 DP 16104 34073 47.2632289
ms_clusters_abundant <- levels(seur.mouse@meta.data$Anno_curated)[!levels(seur.mouse@meta.data$Anno_curated) %in% c("Endothelial", "HSC", "Erythrocyte")]
Plot UMAP with cluster annotation.
fig7A_p1 <- do_DimPlot(
seur.mouse,
reduction = "umap",
group.by = "Anno_curated",
idents.keep = ms_clusters_abundant,
na.value = "grey90",
legend.position = "right",
legend.ncol = 1,
font.size = 30,
legend.icon.size = 10
) +
scale_color_manual(values = celltypes_col)
# if needed to rasterise to export plot
# fig7A_p1 <- ggrastr::rasterise(fig7A_p1, layers="Point", dpi=300)
Plot Cd1d1 expression on FeaturePlot.
fig7A_p2 <- do_FeaturePlot(
seur.mouse,
features = "Cd1d1",
plot.title = "Cd1d1 expression",
order = T,
border.color = "black",
border.size = 2,
reduction = "umap",
pt.size = 1.2,
legend.position = "right",
font.size = 30
) &
scale_colour_scico(
palette = "lapaz",
alpha = 0.8,
begin = 0.1,
end = 0.85,
direction = -1
)
# if needed to rasterise to export plot
# fig7A_p2 <- ggrastr::rasterise(fig7A_p2, layers="Point", dpi=300)
Combine plots (Fig 7A).
fig7A_p1 | fig7A_p2
DotPlot(
seur.mouse,
features = rev(c("Cd1d1", "Slamf1", "Slamf6")),
group.by = "Anno_curated",
cols = c("lightgrey", "darkred"),
col.min = 0,
dot.scale = 10
) +
coord_flip() +
theme_cowplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "")
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scico_1.5.0.9000 patchwork_1.1.2 SCpubr_2.0.2 SeuratObject_4.1.3
## [5] Seurat_4.3.0.1 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2
## [9] purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [13] tidyverse_1.3.2 cowplot_1.1.1 RColorBrewer_1.1-3 ggplot2_3.4.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.2 backports_1.4.1 plyr_1.8.8
## [4] igraph_1.5.0 lazyeval_0.2.2 sp_2.0-0
## [7] splines_4.1.3 listenv_0.9.0 scattermore_1.2
## [10] digest_0.6.32 yulab.utils_0.0.6 htmltools_0.5.5
## [13] viridis_0.6.3 fansi_1.0.4 magrittr_2.0.3
## [16] tensor_1.5 googlesheets4_1.1.1 cluster_2.1.4
## [19] ROCR_1.0-11 tzdb_0.4.0 globals_0.16.2
## [22] modelr_0.1.11 matrixStats_1.0.0 timechange_0.2.0
## [25] spatstat.sparse_3.0-2 colorspace_2.1-0 rvest_1.0.3
## [28] ggrepel_0.9.3 haven_2.5.3 xfun_0.39
## [31] crayon_1.5.2 jsonlite_1.8.7 progressr_0.13.0
## [34] spatstat.data_3.0-1 survival_3.5-5 zoo_1.8-12
## [37] glue_1.6.2 polyclip_1.10-4 gtable_0.3.3
## [40] gargle_1.5.1 leiden_0.4.3 future.apply_1.11.0
## [43] abind_1.4-5 scales_1.2.1 DBI_1.1.3
## [46] spatstat.random_3.1-5 miniUI_0.1.1.1 Rcpp_1.0.10
## [49] viridisLite_0.4.2 xtable_1.8-4 gridGraphics_0.5-1
## [52] reticulate_1.30 htmlwidgets_1.6.2 httr_1.4.6
## [55] ellipsis_0.3.2 ica_1.0-3 farver_2.1.1
## [58] pkgconfig_2.0.3 sass_0.4.6 uwot_0.1.16
## [61] dbplyr_2.3.2 deldir_1.0-9 utf8_1.2.3
## [64] labeling_0.4.2 ggplotify_0.1.1 tidyselect_1.2.0
## [67] rlang_1.1.1 reshape2_1.4.4 later_1.3.1
## [70] munsell_0.5.0 cellranger_1.1.0 tools_4.1.3
## [73] cachem_1.0.8 cli_3.6.3 generics_0.1.3
## [76] broom_1.0.5 ggridges_0.5.4 evaluate_0.21
## [79] fastmap_1.1.1 yaml_2.3.7 goftest_1.2-3
## [82] knitr_1.43 fs_1.6.2 fitdistrplus_1.1-11
## [85] RANN_2.6.1 pbapply_1.7-2 future_1.33.0
## [88] nlme_3.1-162 mime_0.12 xml2_1.3.4
## [91] compiler_4.1.3 rstudioapi_0.14 plotly_4.10.2
## [94] png_0.1-8 spatstat.utils_3.0-3 reprex_2.0.2
## [97] bslib_0.5.0 stringi_1.7.12 highr_0.10
## [100] lattice_0.21-8 Matrix_1.5-4.1 vctrs_0.6.3
## [103] pillar_1.9.0 lifecycle_1.0.3 spatstat.geom_3.2-1
## [106] lmtest_0.9-40 jquerylib_0.1.4 RcppAnnoy_0.0.21
## [109] data.table_1.14.8 irlba_2.3.5.1 httpuv_1.6.11
## [112] R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-21
## [115] gridExtra_2.3 parallelly_1.36.0 codetools_0.2-19
## [118] assertthat_0.2.1 MASS_7.3-60 withr_2.5.0
## [121] sctransform_0.3.5 parallel_4.1.3 hms_1.1.3
## [124] grid_4.1.3 rmarkdown_2.23 googledrive_2.1.1
## [127] Rtsne_0.16 spatstat.explore_3.2-1 shiny_1.7.4
## [130] lubridate_1.9.2